library(tidyverse)
library(dplyr)
library(janitor)
library(plotly)
library(stringr)
library(plotly)
data = 
  read_csv("Data/SearchResultsTable.csv") %>%
  clean_names()  
data_for_plot = 
  data %>% 
  select(nct_number, start_date, completion_date, phases, study_type, 
         enrollment, interventions) %>% 
  mutate(mm_start   = str_sub(start_date,  1,  3), 
         yy_start   = str_sub(start_date, -4, -1), 
         mm_end     = str_sub(completion_date,  1,  3),
         yy_end     = str_sub(completion_date, -4, -1)) %>%
  filter(!mm_start == "nul" & !mm_end == "nul" & 
           !yy_start == "null" & !yy_end == "null") %>%
  mutate(date_start = as.Date(paste(yy_start, mm_start, sep = "-"), "%Y-%b"),
    date_end   = as.Date(paste(yy_end, mm_end, sep = "-"), "%Y-%b"))
  


temp = 
  data %>% 
  select(nct_number, start_date, completion_date, phases, study_type, 
         enrollment, interventions) %>% 
  mutate(mm_start   = str_sub(start_date,  1,  3), 
         yy_start   = str_sub(start_date, -4, -1), 
         mm_end     = str_sub(completion_date,  1,  3),
         yy_end     = str_sub(completion_date, -4, -1)) %>%
  filter(!mm_start == "nul" & !mm_end == "nul" & 
           !yy_start == "null" & !yy_end == "null")
temp =
  temp %>%
  mutate(date = paste(yy_start, mm_start, sep = "-")) 

as.Date(temp$date[1:10], "%Y-%b")

#Sys.setlocale("en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8")

data_for_plot %>% 
    plot_ly(x = ~product_name, y = ~n, color = ~product_name, 
          type = "bar") %>%
  layout(xaxis = list(showticklabels = FALSE))

Separate elligible enrollment age into starting and ending age:

data_for_plot = 
  data %>% 
  select(rank, nct_number, phases, study_type, enrollment, nct_number, age) %>% 
  mutate(age_start   = str_sub(age,  1,  3) ) %>%
  mutate(age_start = replace(age_start, age_start == "Chi", 0)) %>%
  mutate(age_start = replace(age_start, age_start == "up ", 0)) %>%
  separate(age_start, c("age_start", "remove"), " ") %>%
  mutate(age_start = as.numeric(age_start)) %>%
  mutate(n = as.numeric(enrollment)) %>%
    mutate(age_end = str_sub(age, 13, 14)) %>%
  # replace "...Years and older   (Adult, Senior)" with age_end==100
  mutate(age_end = replace(age_end, age_end == " o", 100)) %>%
  #replace "Child, Adult, Senior" with age_end == 100
  mutate(age_end = replace(age_end, age_end == ", ", 100)) %>%
  #replace "up to 100 Years   (Child, Adult, Senior)" with age_end==100
  mutate(age_end = replace(age_end, age_end == "ar", 100)) %>%
  # replace "...Years and older   (Child, Adult, Senior)" for those with 
  # starting one digit age with age_end == 100
  mutate(age_end = replace(age_end, age_end == "ol", 100)) %>%
  # replace "up to ..Years   (Child, Adult, Senior)" where ... is 2 digit
  # with age_end == 100
  mutate(age_end1 = str_sub(age, 7, 8)) %>%
  mutate(age_end = replace(age_end, age_end == "rs", age_end1[age_end == "rs"])) %>%
  # convert age_end to numeric
  mutate(age_end = as.numeric(age_end)) %>%
  select(-c(remove, age_end1)) %>%
  # reorder 
  mutate(nct_number = forcats::fct_reorder(nct_number, age_start))

Static dot plot with facets for study type. We can set study type as user input/filter in the final dashboard:

data_for_plot %>%
  na.omit() %>% 
  #filter(study_type == "Interventional") %>%
  #filter(study_type == "Observational") %>%
  ggplot(aes(x = age_start, y = n, color = phases)) +
  geom_point(alpha = 0.3) +
  geom_jitter(width = 0.5) +
  facet_wrap(~study_type) +
  labs(x = "eligible entry age",
       y = "enrollment") +
  theme_bw()

Interactive dot plot (without facetting, but can implememt if needed):

data_for_plot %>%
  mutate(hover_text = str_c("\nStudy ID: ", nct_number)) %>% 
  na.omit() %>% 
  plot_ly(x = ~age_start, y = ~n, color = ~phases,
          text = ~hover_text,
          type = "scatter", mode = "marker")

Prepare data for line plot - convert into long format and group by nct_number:

data_for_line_plot = 
  data_for_plot %>%
  gather(key = "eligible_age", value = "years", age_start, age_end) %>%
  group_by(nct_number) %>%
  arrange(years)

Static plot with lines for eligible age:

data_for_line_plot %>%
  na.omit() %>% 
  #filter(study_type == "Interventional") %>%
  #filter(study_type == "Observational") %>%
  ggplot(aes(x = years, y = n, color = phases, group = nct_number)) +
  geom_path() +
  facet_wrap(~study_type) +
  labs(x = "eligible age",
       y = "enrollment") +
  theme_bw()

Static plot with lines for eligible age:

p = data_for_line_plot %>%
  na.omit() %>% 
  #filter(study_type == "Interventional") %>%
  #filter(study_type == "Observational") %>%
  ggplot(aes(x = years, y = n, color = phases, group = nct_number)) +
  geom_path() +
  facet_wrap(~study_type) +
  labs(x = "eligible age",
       y = "enrollment") +
  theme_bw()

ggplotly(p)